%%% Written by  Daniel Lewis, 2020, for "Robust Inference in Models 
%%% Identified via Heteroskedasticity", Review of Economics and Statistics.
%
% This file reads in underlying data from the files RealYields.csv,
% NominalYields.csv, and dataforMatlabAltEstTick.csv, found in the Nakamura
% and Steinsson (2018) replication archive. It performs transformations, 
% merges the series, and extracts the variables needed for subsequent 
% analysis. It saves them as a .mat file, "dataset". 

clear; clc 

%% Read in data

% Real Forward
M_T=readtable('RealYields.csv');
date=datetime(table2array(M_T(:,1)),'InputFormat','yyyy-MM-dd');
date=flipud(date(1:end-1)); % sort from old to new, dropping oldest observation
TIPS=table2array(M_T(:,40)); % extract nominal yield and forward rate 
RF=diff(flipud(TIPS)); % take daily changes
RF=timetable(date,RF); % Form timetable

% Nominal rates
M_T=readtable('NominalYields.csv');
date=datetime(table2array(M_T(:,1)),'InputFormat','yyyy-MM-dd');
date=flipud(date(1:end-1)); % sort from old to new, dropping oldest observation
data=table2array(M_T(:,[3 63])); % extract nominal yield and forward rate 
data=diff(flipud(data)); % take daily changes
Nomrates=timetable(date,data(:,1),data(:,2),'VariableNames',{'NY','NF'}); % Form timetable

% Nakamura-Steinsson Policy Shocks
M_HF=readtable('dataforMatlabAltEstTick.csv'); % Excel file containing control days (not included in web version of NS (2018) replication data)
date=datetime(table2array(M_HF(:,1)),'InputFormat','ddMMMyyyy');
HF=timetable(date,table2array(M_HF(:,10)),table2array(M_HF(:,3)),1-table2array(M_HF(:,3)),'VariableNames',{'HF','FOMC','control'}); % Make a table for merge

%% Merge
start_date=datetime('01-01-2004','InputFormat','MM-dd-yyyy'); % earliest date for which real forward rates are available
end_date=datetime('03-19-2014','InputFormat','MM-dd-yyyy'); % last observation in Nakamura and Steinsson (2018)
dataset=synchronize(HF,Nomrates,RF);
dataset(dataset.date<start_date,:)=[]; % Trucate sample so that the same sample is used for all specifications
dataset(dataset.date>end_date,:)=[];
dataset(dataset.date>=datetime('01-07-2008','InputFormat','dd-MM-yyyy') & dataset.date<=datetime('30-06-2009','InputFormat','dd-MM-yyyy'),:)=[]; % Drop crisis period
dataset(isnan(dataset.HF),:)=[]; % drop dates that are neither policy nor control
dataset(sum(isnan(dataset.Variables),2)>0,:)=[]; % drop any dates that are unavailable for some series (in practice: 3 missing control dates due to missings in real forward rates)


%% Extract sub-samples and variables from matrix
pol_data=dataset(logical(dataset.FOMC),:); % take policy subsample
control_data=dataset(logical(dataset.control),:); % take control subsample
pol_data=table2array(pol_data); % convert to matrix
control_data=table2array(control_data); % convert to matrix
RFp=pol_data(:,6); % Real forward policy observations
NYp=pol_data(:,4); % Nominal yield policy observations
NFp=pol_data(:,5); % Nominal forward policy observations
HFp=pol_data(:,1); % Policy news policy observations
RFc=control_data(:,6); % Real forward control observations
NYc=control_data(:,4); % Nominal yield control observations
NFc=control_data(:,5); % Nominal yield control observations
HFc=control_data(:,1); % Policy news control observations
RF=[RFc;RFp];NF=[NFc;NFp];NY=[NYc;NYp];HF=[HFc;HFp]; % Stack control and policy observations into unified vectors

% Save the dataset
save('dataset','RF','NF','NY','HF','RFc','NFc','NYc','HFc','RFp','NFp','NYp','HFp')
